In [2]:
import graphlab
In [8]:
song_data = graphlab.SFrame('song_data.gl/')
In [11]:
song_data.head(2)
Out[11]:
In [12]:
graphlab.canvas.set_target('ipynb')
In [13]:
song_data['song'].show()
In [14]:
len(song_data)
Out[14]:
In [52]:
users = song_data['user_id'].unique().sort()
In [53]:
len(users)
Out[53]:
In [54]:
train_data, test_data = song_data.random_split(0.8, seed=0)
In [55]:
popularity_model = graphlab.popularity_recommender.create(train_data,
user_id='user_id',
item_id='song')
In [56]:
popularity_model.recommend(users=[users[0]])
Out[56]:
In [57]:
popularity_model.recommend(users=[users[1]])
Out[57]:
In [58]:
personalized_model = graphlab.item_similarity_recommender.create(train_data,
user_id='user_id',
item_id='song')
In [59]:
personalized_model.recommend(users=[users[0]])
Out[59]:
In [60]:
personalized_model.recommend(users=[users[1]])
Out[60]:
In [61]:
personalized_model.get_similar_items(['With Or Without You - U2'])
Out[61]:
In [62]:
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])
Out[62]:
In [63]:
%matplotlib inline
model_performance = graphlab.recommender.util.compare_models(test_data,
[popularity_model, personalized_model],
user_sample=0.05)
In [64]:
import matplotlib.pyplot as plt
%matplotlib inline
fig, ax = plt.subplots()
pr_curves_by_model = [res['precision_recall_overall'] for res in model_performance]
pr_curve = pr_curves_by_model[0].sort('recall')
ax.plot(list(pr_curve['recall']), list(pr_curve['precision']),
'blue', label='M1')
pr_curve = pr_curves_by_model[1].sort('recall')
ax.plot(list(pr_curve['recall']), list(pr_curve['precision']),
'green', label='M2')
ax.set_title('Precision-Recall Averaged Over Users')
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
ax.legend()
fig.show()
In [66]:
user_kanye_west = song_data[song_data['artist']=='Kanye West']['user_id'].unique().sort()
In [67]:
len(user_kanye_west)
Out[67]:
In [69]:
user_foo_fighters = song_data[song_data['artist']=='Foo Fighters']['user_id'].unique().sort()
In [70]:
len(user_foo_fighters)
Out[70]:
In [71]:
user_taylor_swift = song_data[song_data['artist']=='Taylor Swift']['user_id'].unique().sort()
In [72]:
len(user_taylor_swift)
Out[72]:
In [73]:
user_lady_gaga = song_data[song_data['artist']=='Lady GaGa']['user_id'].unique().sort()
In [74]:
len(user_lady_gaga)
Out[74]:
In [89]:
artist_listen_count = song_data.groupby(key_columns='artist', operations={'listen_count': graphlab.aggregate.SUM('listen_count')})
In [94]:
artist_listen_count[artist_listen_count['artist']=='Taylor Swift']
Out[94]:
In [95]:
artist_listen_count[artist_listen_count['artist']=='Kings Of Leon']
Out[95]:
In [96]:
artist_listen_count[artist_listen_count['artist']=='Coldplay']
Out[96]:
In [97]:
artist_listen_count[artist_listen_count['artist']=='Lady GaGa']
Out[97]:
In [99]:
artist_listen_count[artist_listen_count['artist'] == 'William Tabbert']
Out[99]:
In [100]:
artist_listen_count[artist_listen_count['artist']=='Velvet Underground & Nico']
Out[100]:
In [101]:
artist_listen_count[artist_listen_count['artist']=='Kanye West']
Out[101]:
In [102]:
artist_listen_count[artist_listen_count['artist']=='The Cool Kids']
Out[102]:
In [108]:
test_data_10000 = test_data['user_id'].unique().sort()[0:10000]
In [110]:
recommended_songs = personalized_model.recommend(test_data_10000, k=1)
In [111]:
recommended_songs.head(2)
Out[111]:
In [118]:
recommended_songs.groupby(key_columns='song', operations={'count': graphlab.aggregate.COUNT()}).sort('count', ascending=False)
Out[118]:
In [ ]:
In [ ]: